xxxxxxxxxximport pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport plotly.express as px%matplotlib inlineimport seaborn as snsfrom sklearn.preprocessing import LabelEncoderfrom sklearn.impute import SimpleImputerdf = pd.read_csv(r"E:\projects\Global_Terrorism\globalterrorismdb_0718dist.csv",encoding='ISO-8859-1')xxxxxxxxxxdf.head()xxxxxxxxxxdf.info()xxxxxxxxxxdf.columnsxxxxxxxxxxdf_terrorism = df[['iyear','imonth','iday','extended','country_txt','region_txt','city','success','suicide','attacktype1_txt', 'targtype1_txt','gname','weaptype1_txt','nkill']]df_terrorism.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day','extended':'Extended','country_txt':'Country', 'region_txt':'Region','city':'City','success':'Success','suicide':'Suicide','attacktype1_txt':'Attack_Type', 'targtype1_txt':'Target_Type','gname':'Attack_Group', 'weaptype1_txt':'Weapon','nkill':'Number_of_Killed'},inplace=True)xxxxxxxxxxdf_terrorism.head()xxxxxxxxxxdf_terrorism.info()xxxxxxxxxxprint(df_terrorism.isnull().sum())# calculate percentageprint('\nPercentage:')(df_terrorism.isnull().sum()/len(df_terrorism))*100xxxxxxxxxx# lets check the columns having less than 50 unique categoriesfor colname,colval in df_terrorism.items(): if df_terrorism[colname].dtype==object and df_terrorism[colname].nunique()<50: print(colname,':',df_terrorism[colname].unique())xxxxxxxxxxObservationThe city column has 0.23 percent null values, we can replace it by unknownThe Number of killed column has 5.6 percent null values , we can replace these by 0Observation
The city column has 0.23 percent null values, we can replace it by unknown The Number of killed column has 5.6 percent null values , we can replace these by 0
xxxxxxxxxx# fill empty values in city column by unknowndf_terrorism['City'].fillna('Unknown',inplace = True)# fill empty values in Number of killeddf_terrorism['Number_of_Killed'].fillna(0,inplace=True)xxxxxxxxxx# rechecking null valuesdf_terrorism.isnull().sum()xxxxxxxxxx# convert float Number_of_killed to intdf_terrorism['Number_of_Killed'] = df_terrorism['Number_of_Killed'].astype(int)xxxxxxxxxxdf_terrorism.duplicated().sum()df_terrorism.drop_duplicates()xxxxxxxxxx# save the new data frame df_terrorism to csv in location with name df_terrorismdf_terrorism.to_csv('df_terrorism.csv', index=False)xxxxxxxxxx# Frequency of sucide rate in attacksimport seaborn as snsimport matplotlib.pyplot as pltsuicide_frequency = df_terrorism['Suicide'].value_counts()sns.barplot(x=suicide_frequency.index, y=suicide_frequency.values)plt.xlabel('Suicide')plt.ylabel('Frequency')plt.title('Frequency of Suicide Rate in Attacks')plt.show()xxxxxxxxxximport seaborn as snsimport matplotlib.pyplot as pltsns.barplot(data=df_terrorism, x='Suicide', y=df_terrorism.index, hue='Success',palette='plasma')plt.xlabel('Suicide')plt.ylabel('Frequency')plt.title('Frequency of Success Rate in Attacks')plt.show()xxxxxxxxxxplt.figure(figsize=(17,20))ax=sns.countplot(y=df_terrorism['Year'])ax.bar_label(ax.containers[0],label_type='edge')plt.title('Number Of Terrorist Activities Each Year');xxxxxxxxxxObservationThe terrorist attack peaked from 2012 with year 2014 as the highest number of terrorist attacks followed by 2015 and 2016Q Visualize the Number of terrorist activities in top 10 citiesObservation
The terrorist attack peaked from 2012 with year 2014 as the highest number of terrorist attacks followed by 2015 and 2016
Q Visualize the Number of terrorist activities in top 10 cities
xxxxxxxxxxplt.figure(figsize=(17,8))top_cities = df_terrorism['City'].value_counts().index[1:11]ax = sns.countplot(x='City', data=df_terrorism, order=top_cities, palette='plasma')ax.bar_label(ax.containers[0], label_type='edge')plt.title('Number of Terrorist Activities in Top 10 Cities')xxxxxxxxxxplt.figure(figsize=(17,8))top_cities = df_terrorism['Country'].value_counts().index[1:11]ax = sns.countplot(x='Country', data=df_terrorism, order=top_cities, palette='plasma')ax.bar_label(ax.containers[0], label_type='edge')plt.title('Number of Terrorist Activities in Top 10 Cities')xxxxxxxxxxQ Visualize the No of terrorist activies in top 10 Countires¶Q Visualize the No of terrorist activies in top 10 Countires¶
xxxxxxxxxxplt.figure(figsize=(17,8))top_cities = df_terrorism['City'].value_counts().index[1:11]ax = sns.countplot(x='City', data=df_terrorism, order=top_cities, palette='plasma')ax.bar_label(ax.containers[0], label_type='edge')plt.title('Number of Terrorist Activities in Top 10 Cities')xxxxxxxxxxQ Visualize the No of terrorist activities in top 10 region by year¶Q Visualize the No of terrorist activities in top 10 region by year¶
xxxxxxxxxxpd.crosstab(df_terrorism.Year,df_terrorism.Region).plot(kind='area',stacked=False,figsize=(20,10))plt.title('Terrorist Activities by Region in Each Year',fontsize = 20)plt.ylabel('Number of Attacks',fontsize = 20)plt.xlabel('Year',fontsize = 20);xxxxxxxxxxQ Visualize the terrorist groups using a word cloud (The size of the word shows the repetitive attacks by a terrorist organization )¶Q Visualize the terrorist groups using a word cloud (The size of the word shows the repetitive attacks by a terrorist organization )¶
#import package and its set of stopwordsfrom collections import Counterfrom wordcloud import WordCloud, STOPWORDSprint ('Wordcloud imported!')text_list = df_terrorism['Attack_Group'].tolist()word_could_dict=Counter(text_list)word_could_dict.pop('Unknown')print('word cloud dictionary created!\n')wc = WordCloud()# creating stopwordsstop_words = list(wc.stopwords)custom_stop_words = ['Unknown']stop_words = set(stop_words + custom_stop_words)wc = WordCloud(stopwords=stop_words)wc.generate_from_frequencies(word_could_dict)fig = plt.figure(figsize=(14, 18))# display the cloudplt.imshow(wc)plt.axis('off')plt.show()xxxxxxxxxxObservation: ISIL, FMLN, and Taliban are three most prominent organization in conducting terrorist activitiesObservation: ISIL, FMLN, and Taliban are three most prominent organization in conducting terrorist activities
xxxxxxxxxxQ Visualize the top 10 common target types for terrorists¶Q Visualize the top 10 common target types for terrorists¶
xxxxxxxxxx### type your answer herecolors_list = ['gold', 'orange', 'lightcoral', 'skyblue', 'purple', 'pink','blue','red','green','Violet']# colorsexplode_list = [0.1, 0, 0, 0, 0.1, 0.1,0, 0, 0.1, 0.1] # ratio for each continent with which to offset each wedge.df_terrorism['Target_Type'].value_counts()[:10].plot(kind='pie', figsize=(15, 15), autopct='%1.1f%%', startangle=90, shadow=True, pctdistance=0.5, colors=colors_list, explode=explode_list, textprops={'fontname':'Arial'}, wedgeprops = { 'linewidth' : 1, 'edgecolor' : 'white' }, labels=df_terrorism['Target_Type'].value_counts()[:10].index )plt.title('To 10 common targets ')plt.show()xxxxxxxxxxQ Visualize the No of Persons killed by Year In Pakistan. What is the relation?Q Visualize the No of Persons killed by Year In Pakistan. What is the relation?
xxxxxxxxxx# reg plot for Number of killed vs year in Pakistan , ticklabels should be cross 'x', and number of killed of each should be summed up firstimport seaborn as snsimport matplotlib.pyplot as plt# Filter the data for Pakistandf_pakistan = df_terrorism[df_terrorism['Country'] == 'Pakistan']# Group the data by year and sum the number of killeddf_pakistan_grouped = df_pakistan.groupby('Year')['Number_of_Killed'].sum().reset_index()# Create a regression plot sns.regplot(x='Year', y='Number_of_Killed', data=df_pakistan_grouped, marker='x',color='red')# Set the ticklabels to cross 'x'plt.xticks(rotation=90)# Add labels and titleplt.xlabel('Year')plt.ylabel('Number of Killed')plt.title('Regression Plot of Number of Killed vs Year in Pakistan')# Show the plotplt.show()xxxxxxxxxximport plotly.express as px# Filter the data for Pakistandf_pakistan = df_terrorism[df_terrorism['Country'] == 'Pakistan']# Group the data by year and sum the number of killeddf_pakistan_grouped = df_pakistan.groupby('Year')['Number_of_Killed'].sum().reset_index()# Create a scatter plot with regression linefig = px.scatter(df_pakistan_grouped, x='Year', y='Number_of_Killed', trendline='ols', trendline_color_override='red')# Set the ticklabels to cross 'x'fig.update_layout(xaxis_tickangle=-45)# Add labels and titlefig.update_layout(xaxis_title='Year', yaxis_title='Number of Killed', title='Regression Plot of Number of Killed vs Year in Pakistan')# Show the plotfig.show()xxxxxxxxxxQ Visualize the Number of people killed by year and country wiseQ Visualize the Number of people killed by year and country wise
xxxxxxxxxximport plotly.express as pxfig = px.scatter(df_terrorism, y= 'Year', x = "Number_of_Killed", size= "Number_of_Killed", color= "Country", log_x=True, size_max=55, range_y=[1970,2020], range_x=[5,1500])fig.show()# download countries geojson file! wget --quiet https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/world_countries.jsonprint('GeoJSON file downloaded!')xxxxxxxxxximport foliumworld_geo = r'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBMDeveloperSkillsNetwork-DV0101EN-SkillsNetwork/Data%20Files/world_countries.json' # geojson file# create a plain world mapworld_map = folium.Map(location=[0, 0], zoom_start=2)xxxxxxxxxximport plotly.express as px# Create the map visualizationfig = px.choropleth(df_terrorism, locations='Country', locationmode='country names',color = 'Number_of_Killed', title='No of person killed by Country', animation_frame='Year',animation_group="Country")# Display the mapfig.show()# locationmode='country names'xxxxxxxxxxsns.heatmap(df_terrorism[['Year','Day','Month','Suicide','Success','Number_of_Killed']].corr(),annot=True)plt.rcParams['figure.figsize']=(17,10);sns.pairplot(df_terrorism);